In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import networkx as x, nltk as k
import string, time, __builtin__
import cPickle as pickle
In [19]:
URL_ENDPOINT="http://localhost:82"
DATASET1="participabr"
DATASET2="aa"
DATASET3="cidadedemocratica"
URL_QUERY1="%s/%s/query"%(URL_ENDPOINT,DATASET1)
URL_QUERY2="%s/%s/query"%(URL_ENDPOINT,DATASET2)
URL_QUERY3="%s/%s/query"%(URL_ENDPOINT,DATASET3)
EXCLUDE=set(string.punctuation+u'\u201c'+u'\u2018'+u'\u201d'+u'\u2022'+u'\u2013')
STOPWORDS=set(k.corpus.stopwords.words('portuguese'))
PREFIX="""PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ops: <http://purl.org/socialparticipation/ops#>
PREFIX opa: <http://purl.org/socialparticipation/opa#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcty: <http://purl.org/dc/dcmitype/>
PREFIX tsioc: <http://rdfs.org/sioc/types#>
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX schema: <http://schema.org/>
PREFIX aa: <http://purl.org/socialparticipation/aa/>
PREFIX ocd: <http://purl.org/socialparticipation/ocd/>"""
stemmer = k.stem.RSLPStemmer()
In [59]:
NOW=time.time()
q="""SELECT DISTINCT ?s ?name
WHERE {
?s a ocd:User .
?s ocd:name ?name
}"""
sparql2 = SPARQLWrapper(URL_QUERY3)
sparql2.setQuery(PREFIX+q)
sparql2.setReturnFormat(JSON)
results = sparql2.query().convert()
print("%.2f segundos para puxar todos os sujeitos"%
(time.time()-NOW,))
In [30]:
nomes=[i["name"]["value"] for i in results["results"]["bindings"]]
for nome in nomes[:10]: print nome
In [60]:
NOW=time.time()
q2="""SELECT DISTINCT ?s ?name ?em ?ph
WHERE {
?s a ocd:User .
?s ocd:name ?name .
OPTIONAL { ?s ocd:mbox ?em }
OPTIONAL { ?s ocd:phone ?ph }
}"""
sparql2 = SPARQLWrapper(URL_QUERY3)
sparql2.setQuery(PREFIX+q2)
sparql2.setReturnFormat(JSON)
results = sparql2.query().convert()
print("%.2f segundos para puxar todos os sujeitos com telefone e email"%
(time.time()-NOW,))
In [51]:
#nomes=[(i["name"]["value"],i["em"]["value"],i["ph"]["value"]) for i in results["results"]["bindings"]]
#for nome in nomes[:10]: print nome
for result in results["results"]["bindings"][:15]:
em,ph="",""
if "em" in result.keys():
em= result["em"]["value"].split(":")[1]
if "ph" in result.keys():
ph= result["ph"]["value"]
print result["name"]["value"],em,ph
In [141]:
NOW=time.time()
q2="""SELECT ?ob ?name ?data ?data2 ?tag
WHERE {
?ob a ocd:Observatory .
?ob ocd:created ?data .
OPTIONAL { ?ob ocd:updated ?data2 . }
?ob ocd:user ?user .
?user ocd:name ?name .
?tagg ocd:tagged ?ob .
?tagg ocd:text ?tag .
}"""
sparql2 = SPARQLWrapper(URL_QUERY3)
sparql2.setQuery(PREFIX+q2)
sparql2.setReturnFormat(JSON)
results = sparql2.query().convert()
print("%.2f segundos para puxar todos os sujeitos"%
(time.time()-NOW,))
print("observatorio do usuario x atualizado em y, tags: z")
In [131]:
for res in results["results"]["bindings"][:5]:
print("observatorio %s de %s, atualizado em %s possui a tag: %s"%
(res["ob"]["value"].split("#")[1],res["name"]["value"],
res["data"]["value"],res["tag"]["value"]))
In [146]:
mem=""
for res in results["results"]["bindings"][360:380]:
if res["ob"]["value"] == mem:
exp=("possui a tag: %s"%(res["tag"]["value"],))
else:
if "data2" in res.keys():
data=res["data2"]["value"]
else:
data=res["data"]["value"]
exp=("\nobservatorio %s de %s, atualizado em %s possui a tag: %s"%
(res["ob"]["value"].split("#")[1],res["name"]["value"],
data,res["tag"]["value"]))
print(exp)
mem=res["ob"]["value"]
In [186]:
NOW=time.time()
q2="""SELECT ?ob ?name (GROUP_CONCAT(?tag; SEPARATOR = ", ") as ?tags)
WHERE {
?ob a ocd:Observatory .
?ob ocd:user ?user .
?user ocd:name ?name .
?tagg ocd:tagged ?ob .
?tagg ocd:text ?tag .
} GROUP BY ?ob ?name"""
sparql2 = SPARQLWrapper(URL_QUERY3)
sparql2.setQuery(PREFIX+q2)
sparql2.setReturnFormat(JSON)
results = sparql2.query().convert()
print("%.2f segundos para puxar todos os sujeitos"%
(time.time()-NOW,))
print("observatorio do usuario x atualizado em y, tags: z")
In [188]:
for res in results["results"]["bindings"][:5]:
#print res["ob"]["value"].split("#")[1], res["tags"]["value"],"\n"
print "%s, TAGS: %s\n"%(res["name"]["value"], res["tags"]["value"])